#!/bin/bash

# This file parses UCSC Chromosome Band table into a Go source code file.
#
# This script will only work on the Chromosome Band table - it will not
# work on an 'Assembly' or 'Scaffold' table.
#
# The prefix, e.g. chr, will be used to label the chromosomes (e.g. chr1, chr2 ... )
# By default, "chr" is used. The package will be used to name the generated package.
#
# To download data tables, see http://genome.ucsc.edu/cgi-bin/hgTables

file=$1
prefix=$2
species=$3
package=$4

if [ ! -n "$file" ]; then
	echo "Please specify the UCSC karyotype table file"
	exit
fi

if [ ! -n "$prefix" ]; then
	prefix="chr"
fi

label="$(tr '[:lower:]' '[:upper:]' <<< ${prefix:0:1})${prefix:1}"

(
	echo -e "// DO NOT EDIT. This file was autogenerated by parse.karyotype\n"
	echo "// Package $package defines chromosome and band intervals for the $species karyotype based on the $package assembly."
	echo -e "package $package\n"

	echo -e "import (\n\t\"github.com/biogo/biogo/feat\"\n\t\"github.com/biogo/biogo/feat/genome\"\n)\n"

	# # chromosomes
	echo 'var ('
	< $file zcat \
	| grep -v '^#' \
	| sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \
	| awk '{print $1,$0}' \
	| sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' \
	| sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \
	| sort -k1,1g -k3rn,3 \
	| sort -u -k1g,1 \
	| awk -v prefix=$prefix -v label=$label '{print "\t"label$2" = genome.Chromosome{Chr: \""prefix$2"\", Desc: \"Chromosome\", Length:",$4"}"}'
	echo -e ')\n'
	echo 'var Chromosomes = []*genome.Chromosome{'
	< $file zcat \
	| grep -v '^#' \
	| sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \
	| awk '{print $1,$0}' \
	| sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' \
	| sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \
	| sort -k1,1g -k3rn,3 \
	| sort -u -k1g,1 \
	| awk  -v label=$label '{print "\t&"label$2","}'
	echo -e '}\n'

	# bands
	echo 'var ('
	< $file zcat \
	| grep -v '^#' \
	| sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \
	| awk '{print $1,$0}' \
	| sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' \
	| sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \
	| sort -k1,1g -k3n,3 \
	| awk  -v label=$label '{print "\t"label$2"_"$5" = genome.Band{Band: \""$5"\", Desc: \"Band\", StartPos:",$3", EndPos:",$4", Giemsa: \""$6"\", Chr: &"label$2"}"}' \
	| sed 's/\.\(.*=\)/_\1/'
	echo -e ')\n'
	echo 'var Bands = []*genome.Band{'
	< $file zcat \
	| grep -v '^#' \
	| sed -e 's/\t/ /g' -e 's/chr//' | tr -s ' ' \
	| awk '{print $1,$0}' \
	| sed -e 's/^[XZ]/1.1e10/' -e 's/^[YW]/1.2e10/' -e 's/^M/1.3e10/' \
	| sed -e 's/^\([1-9][0-9]*\)[lL]/\1/' -e 's/^\([1-9][0-9]*\)[rR]/\1.5/' \
	| sort -k1,1g -k3n,3 \
	| awk  -v label=$label '{print "\t&"label$2"_"$5","}' \
	| sed 's/\./_/'
	echo '}'

	# init
	cat << 'END'
//line parse.karyotype:82
func init() {
	for _, b := range Bands {
		b.Chr.(*genome.Chromosome).Features = append(b.Chr.(*genome.Chromosome).Features, b)
	}
	for _, c := range Chromosomes {
		bc := make([]feat.Feature, len(c.Features))
		copy(bc, c.Features)
		c.Features = bc
	}
}
END
) | gofmt
